clear all


********************************************************************************

//Load data
////////////////////////////////////////////////////////////////////////////////

foreach s of numlist 101 103/107 179/199 {

display in red "************************ Series `s' ************************ "

//Pages 2-3
import excel using "./data sheets/DATA_Series `s'.xlsx", clear sheet("Pages 2-3") cellrange(a3)

***Mine info
{
rename A series
rename B year
rename C district
rename D arrondissement
rename E ordernr
rename F mapnr
rename G mine_name
rename H mine_community
rename I concession_date
rename J concession_ha
rename K concession_ha_prov
rename L operations_active
rename M operations_reserve
rename N operations_construction
rename O pit_name
rename P pit_power

drop if missing(ordernr)
}
***Capital
{
rename Q k_extraction_v_n
rename R k_extraction_m_n
rename S k_extraction_t_n
rename T k_extraction_hp
rename U k_extraction_hn
rename V k_excavation_t_n
rename W k_excavation_p_n
rename X k_excavation_pp_n
rename Y k_excavation_hp
rename Z k_excavation_hn
rename AA k_ventilation

rename AB galleries_transport_n
rename AC galleries_water_n
rename AD horses_underground_n
rename AE horses_surface_n
}
***Labor
{
rename AF l_under_men_n
rename AG l_under_men_w
rename AH l_under_women_n
rename AI l_under_women_w
rename AJ l_under_boys_n
rename AK l_under_boys_w
rename AL l_under_girls_n
rename AM l_under_girls_w
rename AN l_under_total_n
rename AO l_under_total_w
}

tempfile pages23
save `pages23', replace

//Pages 4-5
import excel using "./data sheets/DATA_Series `s'.xlsx", clear sheet("Pages 4-5") cellrange(a3) 

***Mine info
{
rename A district
rename B arrondissement
rename C ordernr
rename D mapnr
rename E mine_name

drop if missing(ordernr)
}

***Labor (2)
{
rename F l_surface_men_n
rename G l_surface_men_w
rename H l_surface_women_n
rename I l_surface_women_w
rename J l_surface_boys_n
rename K l_surface_boys_w
rename L l_surface_girls_n
rename M l_surface_girls_w
rename N l_surface_total_n
rename O l_surface_total_w
rename P l_total_n
}

***Production
{
rename Q q_leans_gr_t
rename R q_leans_gr_p
rename S q_leans_glx_t
rename T q_leans_glx_p
rename U q_leans_mn_t
rename V q_leans_mn_p
rename W q_leans_tot_t

rename X q_drys_gr_t
rename Y q_drys_gr_p
rename Z q_drys_glx_t
rename AA q_drys_glx_p
rename AB q_drys_tot_t

rename AC q_leanl_gr_t
rename AD q_leanl_gr_p
rename AE q_leanl_gal_t
rename AF q_leanl_gal_p
rename AG q_leanl_glx_t
rename AH q_leanl_glx_p
rename AI q_leanl_tot_t

rename AJ q_fatl_gr_t
rename AK q_fatl_gr_p
rename AL q_fatl_glx_t
rename AM q_fatl_glx_p
rename AN q_fatl_tot_t
}

tempfile pages45
save `pages45', replace

//Pages 6-7
import excel using "./data sheets/DATA_Series `s'.xlsx", clear sheet("Pages 6-7") cellrange(a3) 

***Mine info
{
rename A district
rename B arrondissement
rename C ordernr
rename D mapnr
rename E mine_name

drop if missing(ordernr)
}

***Production (2)
{
rename F q_fatty_gr_t
rename G q_fatty_gr_p
rename H q_fatty_glx_t
rename I q_fatty_glx_p
rename J q_fatty_tot_t

rename K q_total_gr_t
rename L q_total_gal_t
rename M q_total_glx_t
rename N q_total_mn_t
rename O q_total_t
rename P q_total_pq
}


***Finances
{
rename Q expenses_l
rename R expenses_other
rename S expenses_total
rename T balance_gain
rename U balance_loss
rename V expenses_prep
rename W observations
}

tempfile pages67
save `pages67', replace

//Merge pages
clear
use `pages23'
merge 1:1 district arrondissement ordernr using `pages45', nogen
merge 1:1 district arrondissement ordernr using `pages67', nogen

{
***Test consistency production categories: OK
/*
keep q_*_t
global term "gr tot"
foreach x in $term {
egen test_`x' = rowtotal(q_leans_`x'_t q_drys_`x'_t  q_leanl_`x'_t  q_fatl_`x'_t  q_fatty_`x'_t )
}
egen test_gal = rowtotal(/*q_leans_gal_t q_drys_gal_t*/  q_leanl_gal_t  /*q_fatl_gal_t  q_fatty_gal_t*/)
egen test_glx = rowtotal(q_leans_glx_t q_drys_glx_t  q_leanl_glx_t  /*q_fatl_glx_t*/  q_fatty_glx_t )
egen test_mn = rowtotal(q_leans_mn_t /*q_drys_mn_t  q_leanl_mn_t  q_fatl_mn_t  q_fatty_mn_t*/ )

keep test_gr q_total_gr_t test_gal q_total_gal_t test_glx q_total_glx_t test_mn q_total_mn_t test_tot q_total_tot_t
order test_gr q_total_gr_t test_gal q_total_gal_t test_glx q_total_glx_t test_mn q_total_mn_t test_tot q_total_tot_t
*/
}

//String variables
////////////////////////////////////////////////////////////////////////////////
tostring arrondissement, replace
tostring pit_name pit_power, replace
tostring k_extraction_*_n k_excavation_*_n, replace
tostring k_ventilation, replace


//Resolve multiple figures in one cell
////////////////////////////////////////////////////////////////////////////////

//Average out if there are multiple wages
global varlist_under "l_under_men_w l_under_women_w l_under_boys_w l_under_girls_w l_under_total_w"
global varlist_surface " l_surface_men_w l_surface_women_w l_surface_boys_w l_surface_girls_w l_surface_total_w"
foreach var in $varlist_under $varlist_surface {

capture confirm numeric variable `var'
if !_rc {
display in red "OK"
}
else {
gen `var'_1 = trim(substr(`var',1,strpos(`var',",")-1)) if strpos(`var',",")>0
gen `var'_2 = trim(substr(`var',strpos(`var',",")+1,.)) if strpos(`var',",")>0
replace `var'_1 = `var' if strpos(`var',",")==0
replace `var'_2 =  `var' if strpos(`var',",")==0
destring `var'_*, replace
rename `var' `var'_org
gen `var' = (`var'_1 + `var'_2)/2
order `var' , after(`var'_org)
drop `var'_1 `var'_2
}
}

//Average (weighted!) prices/quantities production if there are multiple prices
global varlist_t "q_leans_gr_t  q_leans_glx_t  q_leans_mn_t q_drys_gr_t  q_drys_glx_t  q_leanl_gr_t  q_leanl_gal_t  q_leanl_glx_t  q_fatl_gr_t  q_fatl_glx_t  q_fatty_gr_t  q_fatty_glx_t"
global varlist_p "q_leans_gr_p  q_leans_glx_p  q_leans_mn_p q_drys_gr_p  q_drys_glx_p  q_leanl_gr_p  q_leanl_gal_p  q_leanl_glx_p  q_fatl_gr_p  q_fatl_glx_p  q_fatty_gr_p  q_fatty_glx_p"
global term "q_leans_gr  q_leans_glx  q_leans_mn q_drys_gr  q_drys_glx  q_leanl_gr  q_leanl_gal  q_leanl_glx  q_fatl_gr  q_fatl_glx  q_fatty_gr  q_fatty_glx"

foreach term in $term {
capture confirm numeric variable `term'_t
if !_rc {
display in red "OK"
}
else {

global pq "p t"
foreach x in $pq {
gen `term'_`x'_1 = trim(substr(`term'_`x',1,strpos(`term'_`x',",")-1)) if strpos(`term'_`x',",")>0
gen `term'_`x'_2 = trim(substr(`term'_`x',strpos(`term'_`x',",")+1,.)) if strpos(`term'_`x',",")>0
destring `term'_`x'_*, replace
destring `term'_`x', force gen(`term'_`x'_temp)
}

rename `term'_p `term'_p_org
gen `term'_p = (`term'_p_1*`term'_t_1 + `term'_p_2*`term'_t_2) / (`term'_t_1+`term'_t_2)
replace `term'_p = `term'_p_temp if !missing(`term'_p_temp)
order `term'_p, after(`term'_p_org)

rename `term'_t `term'_t_org
egen `term'_t = rowtotal(`term'_t_1 `term'_t_2)
replace `term'_t = . if `term'_t==0
replace `term'_t = `term'_t_temp if !missing(`term'_t_temp)
order `term'_t, after(`term'_t_org)

drop `term'_p_1 `term'_p_2 `term'_t_1 `term'_t_2 `term'_*_temp 
}
}


//Aggregate totals prices/quantities 
global varlist_tot "q_leans_tot_t q_drys_tot_t q_leanl_tot_t q_fatl_tot_t q_fatty_tot_t"
foreach var in $varlist_tot {

capture confirm numeric variable `var'
if !_rc {
display in red "OK"
}
else {
gen `var'_1 = trim(substr(`var',1,strpos(`var',",")-1)) if strpos(`var',",")>0
gen `var'_2 = trim(substr(`var',strpos(`var',",")+1,.)) if strpos(`var',",")>0
destring `var'_*, replace
destring `var', force gen(`var'_temp)
rename `var' `var'_org
egen `var' = rowtotal(`var'_1 `var'_2)
replace `var' = . if `var'==0
replace `var' = `var'_temp if !missing(`var')
order `var' , after(`var'_org)
drop `var'_1 `var'_2 `var'_temp
}
}


//Remove average wages from l_total_n
capture confirm numeric variable l_total_n
if !_rc {
display in red "OK"
}
else {
replace l_total_n = trim(substr(l_total_n,1,strpos(l_total_n,",")-1)) if strpos(l_total_n,",")>0
destring l_total_n, replace
}

//Seperate "C M" (moulettes) 
capture confirm numeric variable k_extraction_hp
if !_rc {
display in red "OK"
}
else {
gen k_extraction_hp_m = trim(substr(k_extraction_hp,strpos(k_extraction_hp,",")+1,.)) if strpos(k_extraction_hp,",")>0
replace k_extraction_hp = trim(substr(k_extraction_hp,1,strpos(k_extraction_hp,",")-1)) if strpos(k_extraction_hp,",")>0
destring k_extraction_hp k_extraction_hp_m, replace
order k_extraction_hp_m, after(k_extraction_hp)
}

//Aggregate "f" and "h" in "C H" (extraction) 
capture confirm numeric variable k_extraction_hn 
if !_rc {
display in red "OK"
}
else {
gen k_extraction_hn_1 = trim(substr(k_extraction_hn,1,strpos(k_extraction_hn,",")-1)) if strpos(k_extraction_hn,",")>0
gen k_extraction_hn_2 = trim(substr(k_extraction_hn,strpos(k_extraction_hn,",")+1,.)) if strpos(k_extraction_hn,",")>0
destring k_extraction_hn_*, replace
destring k_extraction_hn, force gen(k_extraction_hn_temp)
rename k_extraction_hn k_extraction_hn_org
egen k_extraction_hn = rowtotal(k_extraction_hn_1 k_extraction_hn_2)
replace k_extraction_hn = . if k_extraction_hn==0
replace k_extraction_hn = k_extraction_hn_temp if !missing(k_extraction_hn_temp)
order k_extraction_hn , after(k_extraction_hn_org)
drop k_extraction_hn_1 k_extraction_hn_2 k_extraction_hn_temp
}


//Seperate "Operations active" 
capture confirm numeric variable operations_active
if !_rc {
display in red "OK"
}
else {
gen operations_active_ha = trim(substr(operations_active,strpos(operations_active,",")+1,.)) if strpos(operations_active,",")>0
replace operations_active = trim(substr(operations_active,1,strpos(operations_active,",")-1)) if strpos(operations_active,",")>0
destring operations_active, replace
order operations_active_ha, after(operations_active) 
}


tempfile data`s'
save `data`s'', replace
}


//Merge series
////////////////////////////////////////////////////////////////////////////////
use `data101', clear

foreach s of numlist 103/107 179/199 {
display in red "** `s' **"
append using `data`s''
}
order k_extraction_hp_m, after(k_extraction_hp)
drop AP AQ AR AS AT //Empty rows

save data_101-107_179-199.dta, replace

exit
